********************************************************************
** Importing results from Machine Learning  - Lasso/Ridge/Elastic Net
clear
import delimited "$maindir/Results/Model_Outputs/predictpre_results_elastnet.csv", varnames(1) case(preserve)

gen meterend = mdy(end_month, end_day, end_year)
format meterend %d
drop end_day end_month end_year
order meterend, after(Household)

ds, has(type string) 
foreach x of varlist `r(varlist)' {
	replace `x' = "" if `x'=="NA"
	destring `x', replace
}

foreach x of varlist in_glmnet_0_50_All-cv_glmnet_1_200_All{
	egen temp=total(`x')
	replace `x' = 0 if temp==0
	drop temp
}


** merge with real usage
merge 1:1 Household meterend using "$maindir/Data/ihwap_anonym.dta", keepusing(total_mmbtu) nogen keep(3)
order total_mmbtu, after(meterend)

**** prediction errors
foreach x of varlist in_glmnet_0_50_All-cv_glmnet_1_200_All {
	egen temp=total(`x')
	gen r`x' = total_mmbtu - `x'
	replace r`x' = 0 if temp==0
	gen e`x' = 100*r`x'/total_mmbtu
	replace e`x' = 0 if temp==0
	drop temp
}


******* performance metrics

matrix alphas = (.)
matrix maxvars = (.)
matrix inrmse = (.)
* in sample RMSE
foreach x of varlist rin_* {
	gen alpha = 0 if regexm("`x'", "_0_")==1
	replace alpha = 0.25 if regexm("`x'", "_025_")==1
	replace alpha = 0.5 if regexm("`x'", "_05_")==1
	replace alpha = 0.75 if regexm("`x'", "_075_")==1
	replace alpha = 1 if regexm("`x'", "_1_")==1
	sum alpha
	matrix alpha = (`r(mean)')
	matrix alphas = (alphas \ alpha)

	gen maxvar = 50 if regexm("`x'", "_50_")==1
	replace maxvar = 75 if regexm("`x'", "_75_")==1
	replace maxvar = 100 if regexm("`x'", "_100_")==1
	replace maxvar = 150 if regexm("`x'", "_150_")==1
	replace maxvar = 200 if regexm("`x'", "_200_")==1
	sum maxvar
	matrix maxvar = (`r(mean)')
	matrix maxvars = (maxvars \ maxvar)
	
	gen sq`x' = `x'^2
	qui : sum sq`x'
	sca mean_sq_err = `r(mean)'
	di "MSE = " mean_sq_err
	sca rmse = sqrt(mean_sq_err)
	di "RMSE = " rmse
	matrix inrmse = (inrmse \ rmse)

	drop alpha maxvar sq`x'
}

matrix cvrmse = (.)
* cross validated RMSE
foreach x of varlist rcv_* {
	gen sq`x' = `x'^2
	qui : sum sq`x'
	sca mean_sq_err = `r(mean)'
	di "MSE = " mean_sq_err
	sca rmse = sqrt(mean_sq_err)
	di "RMSE = " rmse
	matrix cvrmse = (cvrmse \ rmse)
		
	drop sq`x'
}

matrix results = (alphas , maxvars , inrmse , cvrmse)
matrix results = results[2..., 1...]
mata : st_matrix("results", sort(st_matrix("results"), (1,2)))

matrix colnames results = "Alpha" "Max Variables" "In Sample RMSE" "Cross-Validated RMSE"
matrix rownames results = "Ridge" "Ridge" "Ridge" "Ridge" "Ridge" "Elastic Net" "Elastic Net" "Elastic Net" "Elastic Net" "Elastic Net" "Elastic Net" "Elastic Net" "Elastic Net" "Elastic Net" "Elastic Net" "Elastic Net" "Elastic Net" "Elastic Net" "Elastic Net" "Elastic Net" "Lasso" "Lasso" "Lasso" "Lasso" "Lasso"


esttab matrix(results, fmt(2 0 3 3)) ///
		using "$maindir/Results/Tables/elasticnet_results.tex", replace nomtitles



********************************************************************
** Importing results from Machine Learning  - xgboost
clear
import delimited "$maindir/Results/Model_Outputs/predictpre_results_xgboost.csv", varnames(1) case(preserve)

gen meterend = mdy(end_month, end_day, end_year)
format meterend %d
drop end_day end_month end_year
order meterend, after(Household)

** merge with real usage
merge 1:1 Household meterend using "$maindir/Data/ihwap_anonym.dta", keepusing(total_mmbtu) nogen keep(3)
order total_mmbtu, after(meterend)

**** prediction errors
foreach x of varlist in_xgb_1000_20_005_30_All-cv_xgb_2000_30_05_30_All {
	gen r`x' = total_mmbtu - `x'
	gen e`x' = 100*r`x'/total_mmbtu
}


******* performance metrics

matrix ntrees = (.)
matrix maxdepths = (.)
matrix shrinkages = (.)
matrix inrmse = (.)
* in sample RMSE
foreach x of varlist rin_* {
	gen ntree = 1000 if regexm("`x'", "_1000_")==1
	replace ntree = 2000 if regexm("`x'", "_2000_")==1
	sum ntree
	matrix ntree = (`r(mean)')
	matrix ntrees = (ntrees \ ntree)

	gen maxdepth = 20 if regexm("`x'", "_20_")==1
	replace maxdepth = 30 if regexm("`x'", "_20_")==0
	sum maxdepth
	matrix maxdepth = (`r(mean)')
	matrix maxdepths = (maxdepths \ maxdepth)
	
	gen shrinkage = 0.05 if regexm("`x'", "_005_")==1
	replace shrinkage = 0.5 if regexm("`x'", "_05_")==1
	sum shrinkage
	matrix shrinkage = (`r(mean)')
	matrix shrinkages = (shrinkages \ shrinkage)
	
	gen sq`x' = `x'^2
	qui : sum sq`x'
	sca mean_sq_err = `r(mean)'
	di "MSE = " mean_sq_err
	sca rmse = sqrt(mean_sq_err)
	di "RMSE = " rmse
	matrix inrmse = (inrmse \ rmse)

	drop ntree maxdepth shrinkage sq`x'
}

matrix cvrmse = (.)
matrix minpernode = (.)
* cross validated RMSE
foreach x of varlist rcv_* {
	gen sq`x' = `x'^2
	qui : sum sq`x'
	sca mean_sq_err = `r(mean)'
	di "MSE = " mean_sq_err
	sca rmse = sqrt(mean_sq_err)
	di "RMSE = " rmse
	matrix cvrmse = (cvrmse \ rmse)
	matrix minpernode = (minpernode \ 30)
		
	drop sq`x'
}

matrix results = (ntrees , maxdepths , shrinkages , minpernode, inrmse , cvrmse)
matrix results = results[2..., 1...]
*mata : st_matrix("results", sort(st_matrix("results"), (1,2)))

matrix colnames results = "N Trees" "Max Tree Depth" "Shrinkage" "Min Obs per Node" "In Sample RMSE" "Cross-Validated RMSE"
matrix rownames results = "1" "2" "3" "4" "5" "6" "7" "8"

esttab matrix(results, fmt(0 0 2 0 3 3)) ///
		using "$maindir/Results/Tables/xgboost_results.tex", replace nomtitles



********************************************************************
** Importing results from Machine Learning  - random forests
clear
import delimited "$maindir/Results/Model_Outputs/predictpre_results_rf.csv", varnames(1) case(preserve)

gen meterend = mdy(end_month, end_day, end_year)
format meterend %d
drop end_day end_month end_year
order meterend, after(Household)

** merge with real usage
merge 1:1 Household meterend using "$maindir/Data/ihwap_anonym.dta", keepusing(total_mmbtu) nogen keep(3)
order total_mmbtu, after(meterend)

**** prediction errors
foreach x of varlist in_randomForest_1000_30_500_All-cv_randomForest_2000_30_1000_All {
	rename `x' `=subinstr("`x'", "_All", "", .)'
}

foreach x of varlist in_randomForest_1000_30_500-cv_randomForest_2000_30_1000 {
	gen r`x' = total_mmbtu - `x'
	gen e`x' = 100*r`x'/total_mmbtu
}


******* performance metrics

matrix ntrees = (.)
matrix maxdepths = (.)
matrix inrmse = (.)
* in sample RMSE
foreach x of varlist rin_* {
	gen ntree = 2000 if regexm("`x'", "_2000_")==1
	replace ntree = 1000 if regexm("`x'", "_2000_")==0
	sum ntree
	matrix ntree = (`r(mean)')
	matrix ntrees = (ntrees \ ntree)

	gen maxdepth = 500 if regexm("`x'", "_500")==1
	replace maxdepth = 1000 if regexm("`x'", "_500")==0
	sum maxdepth
	matrix maxdepth = (`r(mean)')
	matrix maxdepths = (maxdepths \ maxdepth)
	
	gen sq`x' = `x'^2
	qui : sum sq`x'
	sca mean_sq_err = `r(mean)'
	di "MSE = " mean_sq_err
	sca rmse = sqrt(mean_sq_err)
	di "RMSE = " rmse
	matrix inrmse = (inrmse \ rmse)

	drop ntree maxdepth sq`x'
}

matrix cvrmse = (.)
matrix minpernode = (.)
* cross validated RMSE
foreach x of varlist rcv_* {
	gen sq`x' = `x'^2
	qui : sum sq`x'
	sca mean_sq_err = `r(mean)'
	di "MSE = " mean_sq_err
	sca rmse = sqrt(mean_sq_err)
	di "RMSE = " rmse
	matrix cvrmse = (cvrmse \ rmse)
	matrix minpernode = (minpernode \ 30)
		
	drop sq`x'
}

matrix results = (ntrees , maxdepths , minpernode, inrmse , cvrmse)
matrix results = results[2..., 1...]

matrix colnames results = "N Trees" "Max Nodes" "Min Obs per Node" "In Sample RMSE" "Cross-Validated RMSE"
matrix rownames results = "1" "2" "3" "4"

esttab matrix(results, fmt(0 0 0 3 3)) ///
		using "$maindir/Results/Tables/randomforest_results.tex", replace nomtitles
